GLM Alternative Simulation-based dispersion test: Approximation of Pearson residuals
Simulated data
# Simulated data ####
load(here("data","6_approxPear_pois.Rdata"))
simpois <- map_dfr(out.pois, "simulations", .id="ngroups") %>%
separate(ngroups, c("nSim", "sampleSize", "intercept"), sep="_") %>%
rename("overdispersion" = "controlValues") %>%
mutate(nSim = as.factor(as.numeric(nSim)))
load(here("data","6_approxPear_bin.Rdata"))
simbin <- map_dfr(out.bin, "simulations", .id="ngroups") %>%
separate(ngroups, c("nSim", "sampleSize", "intercept"), sep="_") %>%
rename("overdispersion" = "controlValues") %>%
mutate(nSim = as.factor(as.numeric(nSim)))Percentage of zeros
looking at the percentage of zeros in the simulations
simpois %>%
ggplot(aes(x=as.factor(overdispersion), y=prop.zero, col=as.factor(sampleSize))) +
geom_boxplot()+
facet_grid(nSim~intercept, scales="free") +
labs(title="Poisson: Prop obs with zero SD simulated data") +
theme(panel.background = element_rect(color="black"))simbin %>%
ggplot(aes(x=as.factor(overdispersion), y=prop.zero, col=as.factor(sampleSize))) +
geom_boxplot()+
facet_grid(nSim~intercept, scales="free") +
labs(title="Binomial: Prop obs with zero SD simulated data") +
theme(panel.background = element_rect(color="black"))Results without zeros
type I error
p.pois <- simpois %>% filter(prop.zero == 0, overdispersion == 0) %>%
select(sampleSize,intercept, nSim, ends_with(".p")) %>%
pivot_longer(4:6, names_to = "test", values_to = "p.val") %>%
group_by(sampleSize, intercept, nSim, test) %>%
summarise(p.sig = sum(p.val<0.05,na.rm=T),
nsim = length(p.val[!is.na(p.val)]))## `summarise()` has grouped output by 'sampleSize', 'intercept', 'nSim'. You can
## override using the `.groups` argument.
p.pois$prop.sig <- p.pois$p.sig/p.pois$nsim
p.pois$nSim <- as.factor(p.pois$nSim)
# add sig test
for (i in 1:nrow(p.pois)) {
btest <- binom.test(p.pois$p.sig[i], n=p.pois$nsim[i], p=0.05)
p.pois$p.bin0.05[i] <- btest$p.value
p.pois$conf.low[i] <- btest$conf.int[1]
p.pois$conf.up[i] <- btest$conf.int[2]
}## Warning: Unknown or uninitialised column: `p.bin0.05`.
## Warning: Unknown or uninitialised column: `conf.low`.
## Warning: Unknown or uninitialised column: `conf.up`.
p.bin <- simbin %>% filter(prop.zero == 0, overdispersion == 0) %>%
select(sampleSize,intercept, nSim, ends_with(".p")) %>%
pivot_longer(4:6, names_to = "test", values_to = "p.val") %>%
group_by(sampleSize, intercept, nSim, test) %>%
summarise(p.sig = sum(p.val<0.05,na.rm=T),
nsim = length(p.val[!is.na(p.val)]))## `summarise()` has grouped output by 'sampleSize', 'intercept', 'nSim'. You can
## override using the `.groups` argument.
p.bin$prop.sig <- p.bin$p.sig/p.bin$nsim
p.bin$nSim <- as.factor(p.bin$nSim)
# add sig test
for (i in 1:nrow(p.bin)) {
btest <- binom.test(p.bin$p.sig[i], n=p.bin$nsim[i], p=0.05)
p.bin$p.bin0.05[i] <- btest$p.value
p.bin$conf.low[i] <- btest$conf.int[1]
p.bin$conf.up[i] <- btest$conf.int[2]
}## Warning: Unknown or uninitialised column: `p.bin0.05`.
## Warning: Unknown or uninitialised column: `conf.low`.
## Warning: Unknown or uninitialised column: `conf.up`.
p.pois %>%
ggplot(aes(x=nSim, y=prop.sig, col=intercept))+
facet_grid(sampleSize~test)+
geom_point(position = position_dodge(width = 0.2)) +
geom_line(position = position_dodge(width = 0.2), aes(x=as.numeric(nSim))) +
geom_errorbar(aes(ymin=conf.low, ymax=conf.up), width=0.02,
position = position_dodge(width = 0.2)) +
scale_y_sqrt(breaks=c(0,0.01, 0.05,0.25,0.5, 0.75)) +
geom_hline(yintercept = 0.05, linetype = "dotted")+
labs(title= "Poisson: Type I error") +
theme(panel.background = element_rect(color="black"))p.bin %>%
ggplot(aes(x=nSim, y=prop.sig, col=intercept))+
facet_grid(sampleSize~test)+
geom_point(position = position_dodge(width = 0.2)) +
geom_line(position = position_dodge(width = 0.2), aes(x=as.numeric(nSim))) +
geom_errorbar(aes(ymin=conf.low, ymax=conf.up), width=0.02,
position = position_dodge(width = 0.2)) +
scale_y_sqrt(breaks=c(0,0.01, 0.05,0.25,0.5, 0.75)) +
geom_hline(yintercept = 0.05, linetype = "dotted")+
labs(title= "Binomial: Type I error") Figure for supplementary material
bind_rows(list(p.bin, p.pois), .id="model") %>%
filter(nSim %in% c("250", "1000")) %>%
ggplot(aes(x=sampleSize, y=prop.sig, col=intercept, linetype=nSim,shape=nSim))+
facet_grid(model~test,
labeller= as_labeller(c("Alt.p"= "Approximate Pearson",
"DHA.p" = "Sim-based residuals",
"Pear.p" = "Pearson Chi-squared",
"2"= "Poisson",
"1" = "Binomial"))) +
geom_point(position = position_dodge(0.7)) +
ylab("Type I error rate") + xlab("Sample size (n)")+
geom_errorbar(aes(ymin=conf.low, ymax=conf.up), width=0.02,
position = position_dodge(0.7)) +
geom_hline(yintercept = 0.05, linetype = "dotted") Dispersion statistics
Zero overdispersion
dispersion expected = 1
dispois <- simpois %>% filter(prop.zero ==0, overdispersion==0) %>%
select(sampleSize, intercept, nSim, ends_with("dispersion")) %>%
pivot_longer(4:6,names_to = "test", values_to = "disp.statistics") %>%
mutate(nSim = as.factor(nSim))
disbin <- simbin %>% filter(prop.zero ==0, overdispersion==0) %>%
select(sampleSize, intercept, nSim, ends_with("dispersion")) %>%
pivot_longer(4:6,names_to = "test", values_to = "disp.statistics") %>%
mutate(nSim = as.factor(nSim))dispois %>%
ggplot(aes(x = nSim, y = disp.statistics, col = intercept))+
geom_boxplot() +
geom_hline(yintercept = 1, linetype="dashed") +
scale_y_log10() +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Poisson: Dispersion statistics") +
theme(panel.background = element_rect(color="black"))disbin %>%
ggplot(aes(x = nSim, y = disp.statistics, col = intercept))+
geom_boxplot() +
geom_hline(yintercept = 1, linetype="dashed") +
#scale_y_log10() +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Binomial: Dispersion statistics") +
theme(panel.background = element_rect(color="black"))differences between dispersions stats and Pearson
dispois2 <- simpois %>% filter(prop.zero ==0, overdispersion==0) %>%
mutate(P.DHA = Pear.stat.dispersion - DHA.stat.dispersion,
P.Alt = Pear.stat.dispersion - Alt.stat.dispersion) %>%
select(sampleSize,intercept,nSim, P.DHA, P.Alt) %>%
pivot_longer(4:5,names_to = "test", values_to = "dif")
disbin2 <- simbin %>% filter(prop.zero ==0, overdispersion==0) %>%
mutate(P.DHA = Pear.stat.dispersion - DHA.stat.dispersion,
P.Alt = Pear.stat.dispersion - Alt.stat.dispersion) %>%
select(sampleSize,intercept,nSim, P.DHA, P.Alt) %>%
pivot_longer(4:5,names_to = "test", values_to = "dif")dispois2 %>%
ggplot(aes(x = as.factor(nSim), y = dif, col = intercept))+
geom_boxplot() +
geom_hline(yintercept = 0, linetype="dashed") +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Poisson Dispersion statistics",
subtitle = "Difference with Pearson Disp stat") +
theme(panel.background = element_rect(color="black")) #+dispois2 %>% filter(nSim %in% c(250,1000)) %>%
ggplot(aes(x = as.factor(nSim), y = dif, col = intercept))+
geom_boxplot() +
ylab("Dif (Pearson - disp)")+
geom_hline(yintercept = 0, linetype="dashed") +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Poisson Dispersion statistics",
subtitle = "Difference with Pearson Disp stat") +
theme(panel.background = element_rect(color="black")) #+disbin2 %>%
ggplot(aes(x = as.factor(nSim), y = dif, col = intercept))+
geom_boxplot() +
geom_hline(yintercept = 0, linetype="dashed") +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Binomial: Dispersion statistics",
subtitle = "Difference with Pearson Disp stat") +
theme(panel.background = element_rect(color="black")) #+disbin2 %>% filter(nSim %in% c(250,1000)) %>%
ggplot(aes(x = as.factor(nSim), y = dif, col = intercept))+
geom_boxplot() +
ylab("Dif (Pearson - disp)")+
geom_hline(yintercept = 0, linetype="dashed") +
facet_grid(sampleSize~test, scales="free")+
labs(title= "Binomial: Dispersion statistics",
subtitle = "Difference with Pearson Disp stat") +
theme(panel.background = element_rect(color="black")) dipersion with overdispersion
disperpois <- simpois %>% filter(prop.zero ==0) %>%
select(sampleSize, intercept, nSim, ends_with("dispersion")) %>%
pivot_longer(4:6,names_to = "test", values_to = "disp.statistics") %>%
mutate(nSim = as.factor(nSim))
disperbin <- simbin %>% filter(prop.zero ==0) %>%
select(sampleSize, intercept, nSim, ends_with("dispersion")) %>%
pivot_longer(4:6,names_to = "test", values_to = "disp.statistics") %>%
mutate(nSim = as.factor(nSim))disperpois %>% group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics)) %>%
ggplot(aes(x = overdispersion, y = mean.disp, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 1, linetype="dashed") +
scale_y_log10() +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Poisson: Dispersion statistics") +
theme(panel.background = element_rect(color="black"),
legend.position = "bottom")## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
disperpois %>% group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics)) %>%
filter(nSim %in% c(250, 1000)) %>%
ggplot(aes(x = overdispersion, y = mean.disp, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 1, linetype="dashed") +
scale_y_log10() +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Poisson: Dispersion statistics") +
theme(panel.background = element_rect(color="black"))## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
disperbin %>% group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics)) %>%
ggplot(aes(x = overdispersion, y = mean.disp, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 1, linetype="dashed") +
scale_y_log10() +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Binomial: Dispersion statistics") +
theme(panel.background = element_rect(color="black"),
legend.position = "bottom")## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
disperbin %>% group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics)) %>%
filter(nSim %in% c(250, 1000)) %>%
ggplot(aes(x = overdispersion, y = mean.disp, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 1, linetype="dashed") +
scale_y_log10() +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Binomial: Dispersion statistics") +
theme(panel.background = element_rect(color="black"))## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
Figure for supplementary material
disp <- bind_rows(list(disperbin %>%
group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics)) ,
disperpois %>%
group_by(overdispersion,sampleSize, nSim, test, intercept) %>%
summarise(mean.disp = mean(disp.statistics))), .id="model") %>%
filter(nSim %in% c("250", "1000"),
intercept == "0")## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'overdispersion', 'sampleSize', 'nSim',
## 'test'. You can override using the `.groups` argument.
ggplot(disp, aes(x=overdispersion, y=mean.disp, col=test, linetype=nSim, shape=nSim))+
facet_grid(model~sampleSize,
labeller= as_labeller(c("100" = "n = 100",
"1000" = "n = 1000",
"2"= "Poisson",
"1" = "Binomial"))) +
geom_line() +
scale_color_discrete(labels = c("Alternative Sim-based",
"Sim-based residuals",
"Pearson Chi-squared"))+
ylab("Mean dispersion") + xlab("Overdispersion")Power
powerpois <- simpois %>% filter(prop.zero == 0,) %>%
select(sampleSize,intercept, nSim, overdispersion, ends_with(".p")) %>%
pivot_longer(5:7, names_to = "test", values_to = "p.val") %>%
group_by(sampleSize, intercept, nSim, test, overdispersion) %>%
summarise(p.sig = sum(p.val<0.05,na.rm=T),
nsim = length(p.val[!is.na(p.val)]))## `summarise()` has grouped output by 'sampleSize', 'intercept', 'nSim', 'test'.
## You can override using the `.groups` argument.
powerpois$prop.sig <- powerpois$p.sig/powerpois$nsim
powerpois$nSim <- as.factor(powerpois$nSim)
# add sig test
for (i in 1:nrow(powerpois)) {
btest <- binom.test(powerpois$p.sig[i], n=powerpois$nsim[i], p=0.05)
powerpois$p.bin0.05[i] <- btest$p.value
powerpois$conf.low[i] <- btest$conf.int[1]
powerpois$conf.up[i] <- btest$conf.int[2]
}## Warning: Unknown or uninitialised column: `p.bin0.05`.
## Warning: Unknown or uninitialised column: `conf.low`.
## Warning: Unknown or uninitialised column: `conf.up`.
powerbin <- simbin %>% filter(prop.zero == 0,) %>%
select(sampleSize,intercept, nSim, overdispersion, ends_with(".p")) %>%
pivot_longer(5:7, names_to = "test", values_to = "p.val") %>%
group_by(sampleSize, intercept, nSim, test, overdispersion) %>%
summarise(p.sig = sum(p.val<0.05,na.rm=T),
nsim = length(p.val[!is.na(p.val)]))## `summarise()` has grouped output by 'sampleSize', 'intercept', 'nSim', 'test'.
## You can override using the `.groups` argument.
powerbin$prop.sig <- powerbin$p.sig/powerbin$nsim
powerbin$nSim <- as.factor(powerbin$nSim)
# add sig test
for (i in 1:nrow(powerbin)) {
btest <- binom.test(powerbin$p.sig[i], n=powerbin$nsim[i], p=0.05)
powerbin$p.bin0.05[i] <- btest$p.value
powerbin$conf.low[i] <- btest$conf.int[1]
powerbin$conf.up[i] <- btest$conf.int[2]
}## Warning: Unknown or uninitialised column: `p.bin0.05`.
## Warning: Unknown or uninitialised column: `conf.low`.
## Warning: Unknown or uninitialised column: `conf.up`.
powerpois %>%
ggplot(aes(x = overdispersion, y = prop.sig, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 0) +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Poisson:Power") +
theme(panel.background = element_rect(color="black"),
legend.position= "bottom")powerpois %>% filter(nSim %in% c("250", "1000")) %>%
ggplot(aes(x = overdispersion, y = prop.sig, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 0) +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Poisson: Power") +
theme(panel.background = element_rect(color="black"),
legend.position= "bottom")powerbin %>%
ggplot(aes(x = overdispersion, y = prop.sig, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 0) +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Binomial: Power") +
theme(panel.background = element_rect(color="black"),
legend.position= "bottom")powerbin %>% filter(nSim %in% c("250", "1000")) %>%
ggplot(aes(x = overdispersion, y = prop.sig, col = test, linetype=intercept))+
geom_line() +
geom_hline(yintercept = 0) +
facet_grid(sampleSize~nSim, scales="free")+
labs(title= "Binomial: Power") +
theme(panel.background = element_rect(color="black"),
legend.position= "bottom")Figure for supplementary material
pows <- bind_rows(list(powerbin, powerpois), .id="model") %>%
filter(nSim %in% c("250", "1000"),
intercept == "0")
ggplot(pows, aes(x=overdispersion, y=prop.sig, col=test, linetype=nSim))+
facet_grid(model~sampleSize,
labeller= as_labeller(c("100" = "n = 100",
"1000" = "n = 1000",
"2"= "Poisson",
"1" = "Binomial"))) +
geom_line() +
scale_color_discrete(labels = c("Alternative Sim-based",
"Sim-based residuals",
"Pearson Chi-squared"))+
ylab("Power") + xlab("Overdispersion")Comparing residuals: approximate pearson and the pearson residuals
Comparing the coefficients of the regression:
lm(alterna$resPA - resP ~ resP)
expecting intercept and slope in zero.
Slope
slopepois <- simpois %>% filter(prop.zero == 0) %>%
select(sampleSize, intercept, nSim, overdispersion, resPA.slope, resPA.slopeP) %>%
mutate(nSim = as.factor(nSim))
slopebin <- simbin %>% filter(prop.zero == 0) %>%
select(sampleSize, intercept, nSim, overdispersion, resPA.slope, resPA.slopeP) %>%
mutate(nSim = as.factor(nSim))slopepois %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.slope, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim, scales="free") +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title = "Poisson: slope") +
theme(panel.background = element_rect(color = "black"),
legend.position = "bottom")slopepois %>% filter(nSim %in% c("250","1000")) %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.slope, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim) +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title = "Poisson: slope") +
theme(panel.background = element_rect(color = "black"),
legend.position = "bottom")slopepois %>% filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.slope = mean(resPA.slope),
median.slope = median(resPA.slope)) %>%
ggplot(aes(x=overdispersion, y=mean.slope, col=intercept)) +
geom_line()+ geom_point()+
facet_grid(sampleSize ~ nSim) +
geom_hline(yintercept = 0, linetype="dotted") +
labs(title = "Poisson: mean slope") +
theme(panel.background = element_rect(color = "black"))## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
slopebin %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.slope, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim) +
labs(title = "Binomial: slope") +
theme(panel.background = element_rect(color = "black"),
legend.position = "bottom")slopebin %>% filter(nSim %in% c("250","1000")) %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.slope, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim) +
labs(title = "Binomial: slope") +
theme(panel.background = element_rect(color = "black"))slopebin %>% filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.slope = mean(resPA.slope),
median.slope = median(resPA.slope)) %>%
ggplot(aes(x=overdispersion, y=mean.slope, col=intercept)) +
geom_line()+ geom_point()+
facet_grid(sampleSize ~ nSim) +
labs(title="Binomial: mean slope", subtitle = "(App.Pear - Pear) ~ Pear") +
theme(panel.background = element_rect(color = "black"))## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
intercept
interpois <- simpois %>% filter(prop.zero ==0) %>%
select(sampleSize, intercept, nSim, overdispersion, resPA.inter, resPA.interP) %>%
mutate(nSim = as.factor(nSim))
interbin <- simbin %>% filter(prop.zero ==0) %>%
select(sampleSize, intercept, nSim, overdispersion, resPA.inter, resPA.interP) %>%
mutate(nSim = as.factor(nSim))interpois %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.inter, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim, scales="free") +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title="Poisson: intercept")+
theme(panel.background = element_rect(color = "black"))interpois %>% filter(nSim %in% c("250","1000")) %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.inter, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim) +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title="Poisson: intercept")+
theme(panel.background = element_rect(color = "black"))interpois %>% filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.inter = mean(resPA.inter),
median.inter = median(resPA.inter)) %>%
ggplot(aes(x=overdispersion, y=mean.inter, col=intercept)) +
geom_line()+ geom_point()+
facet_grid(sampleSize ~ nSim) +
labs(title="Poisson: mean intercept")+
geom_hline(yintercept = 0, linetype="dotted")+
theme(panel.background = element_rect(color = "black"))## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
interbin %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.inter, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim, scales="free") +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title="Binomial: intercept")+
theme(panel.background = element_rect(color = "black"))interbin %>% filter(nSim %in% c("250","1000")) %>%
ggplot(aes(x=as.factor(overdispersion), y=resPA.inter, col=intercept))+
geom_boxplot() +
facet_grid(sampleSize ~ nSim) +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title="binomial: intercept")+
theme(panel.background = element_rect(color = "black"))interbin %>% filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.inter = mean(resPA.inter),
median.inter = median(resPA.inter)) %>%
ggplot(aes(x=overdispersion, y=mean.inter, col=intercept)) +
geom_line()+ geom_point()+
facet_grid(sampleSize ~ nSim) +
geom_hline(yintercept = 0, linetype="dotted")+
labs(title="Binomial: mean intercept", subtitle = "(App.Pear - Pear) ~ Pear")+
theme(panel.background = element_rect(color = "black"))## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
Figure for supplementary material
slopes <- bind_rows(list(Poisson = slopepois %>%
filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.slope = mean(resPA.slope),
median.slope = median(resPA.slope)),
Binomial = slopebin %>%
filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.slope = mean(resPA.slope),
median.slope = median(resPA.slope))),
.id="model")## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
intercepts <- bind_rows(list(Poisson = interpois %>%
filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.inter = mean(resPA.inter),
median.inter = median(resPA.inter)),
Binomial = interbin %>%
filter(nSim %in% c("250","1000")) %>%
group_by(nSim, sampleSize, intercept, overdispersion) %>%
summarise(mean.inter = mean(resPA.inter),
median.inter = median(resPA.inter))),
.id="model")## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'nSim', 'sampleSize', 'intercept'. You can
## override using the `.groups` argument.
figa <- ggplot(slopes, aes(x=overdispersion, y=mean.slope, col=intercept, linetype=nSim))+
geom_line()+
geom_point()+
geom_hline(yintercept = 0)+
ylab("Mean slope") + labs(tag="A)")+
facet_grid(model~sampleSize, scales="free",
labeller = as_labeller(c("100" = "n = 100",
"1000" = "n = 1000",
"Poisson"="Poisson",
"Binomial" = "Binomial")))
figb <- ggplot(intercepts, aes(x=overdispersion, y=mean.inter, col=intercept, linetype=nSim))+
geom_line()+
geom_point()+
geom_hline(yintercept = 0)+
ylab("Mean intercept") +labs(tag="B)")+
facet_grid(model~sampleSize, scales="free",
labeller = as_labeller(c("100" = "n = 100",
"1000" = "n = 1000",
"Poisson"="Poisson",
"Binomial" = "Binomial")) )
figa + figb + plot_layout(ncol=1, guides="collect")Results with zeros
Selecting only models with zeros and for simulations > 250.
zeropois <-simpois %>% filter(nSim %in% c("250", "1000"),
prop.zero >0)
zerobin <-simbin %>% filter(nSim %in% c("250", "1000"),
prop.zero >0)There are no model with any zero for Binomial with > 250 simulations.
For Poisson, there were only 30, with 100 obs and intercept -1.5
##
## -1.5
## 100 30
Percentage of zero:
ggplot(zeropois, aes(x=as.factor(overdispersion), y=prop.zero))+
geom_dotplot(binaxis="y",stackdir = "center")## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01000 0.01000 0.01000 0.01733 0.02000 0.06000
## sampleSize intercept nSim Pear.p DHA.p Alt.p
## 1 100 -1.5 250 0.8108863 0.928 0.528
Looking at their p-values
zeropois %>%
select(sampleSize,intercept, nSim, overdispersion, ends_with(".p")) %>%
pivot_longer(5:7, names_to = "test", values_to = "p.val") %>%
ggplot(aes(x=as.factor(overdispersion), y=p.val, col=test, )) +
geom_point()+
geom_hline(yintercept = 0.05, linetype="dashed")+
facet_grid(~test)Looking at dispersion stat
zeropois %>%
select(sampleSize,intercept, nSim, overdispersion, ends_with(".dispersion")) %>%
pivot_longer(5:7, names_to = "test", values_to = "disp") %>%
ggplot(aes(x=as.factor(overdispersion), y=disp, col=test, )) +
geom_point()+
geom_hline(yintercept = 1, linetype="dashed")+
facet_grid(~test)